 package com.mijie.homi.search.util;

import java.io.IOException;

import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;

///**
// * Licensed to the Apache Software Foundation (ASF) under one or more
// * contributor license agreements.  See the NOTICE file distributed with
// * this work for additional information regarding copyright ownership.
// * The ASF licenses this file to You under the Apache License, Version 2.0
// * (the "License"); you may not use this file except in compliance with
// * the License.  You may obtain a copy of the License at
// *
// *     http://www.apache.org/licenses/LICENSE-2.0
// *
// * Unless required by applicable law or agreed to in writing, software
// * distributed under the License is distributed on an "AS IS" BASIS,
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// * See the License for the specific language governing permissions and
// * limitations under the License.
// */
//import java.io.IOException;
//import java.util.BitSet;
//
//import org.apache.lucene.index.AtomicReaderContext;
//import org.apache.lucene.index.IndexReader;
//import org.apache.lucene.index.Term;
//import org.apache.lucene.index.TermDocs;
//import org.apache.lucene.index.TermEnum;
//import org.apache.lucene.search.DocIdSet;
//import org.apache.lucene.search.Filter;
//import org.apache.lucene.util.Bits;
//import org.apache.lucene.util.OpenBitSet;
//
 public class DuplicateFilter extends Filter
 {

	@Override
	public DocIdSet getDocIdSet(AtomicReaderContext arg0, Bits arg1)
			throws IOException {
		// TODO Auto-generated method stub
		return null;
	}
	 
 }
//{
//	
//	String fieldName;
//	
//	/**
//	 * KeepMode determines which document id to consider as the master, all others being 
//	 * identified as duplicates. Selecting the "first occurrence" can potentially save on IO.
//	 */
//	int keepMode=KM_USE_FIRST_OCCURRENCE;
//	public static final int KM_USE_FIRST_OCCURRENCE=1;
//	public static final int KM_USE_LAST_OCCURRENCE=2;
//	
//	/**
//	 * "Full" processing mode starts by setting all bits to false and only setting bits
//	 * for documents that contain the given field and are identified as none-duplicates. 
//
//	 * "Fast" processing sets all bits to true then unsets all duplicate docs found for the
//	 * given field. This approach avoids the need to read TermDocs for terms that are seen 
//	 * to have a document frequency of exactly "1" (i.e. no duplicates). While a potentially 
//	 * faster approach , the downside is that bitsets produced will include bits set for 
//	 * documents that do not actually contain the field given.
//	 * 
//	 */
//	int processingMode=PM_FULL_VALIDATION;
//	public static final int PM_FULL_VALIDATION=1;
//	public static final int PM_FAST_INVALIDATION=2;
//	
//
//	
//	public DuplicateFilter(String fieldName)
//	{
//		this(fieldName, KM_USE_LAST_OCCURRENCE,PM_FULL_VALIDATION);
//	}
//	
//
//	public DuplicateFilter(String fieldName, int keepMode, int processingMode)
//	{
//		this.fieldName = fieldName;
//		this.keepMode = keepMode;
//		this.processingMode = processingMode;
//	}
//
//  public DocIdSet getDocIdSet(IndexReader reader) throws IOException
//	{
//		if(processingMode==PM_FAST_INVALIDATION)
//		{
//			return fastBits(reader);
//		}
//		else
//		{
//			return correctBits(reader);
//		}
//	}
//	
//  private OpenBitSet correctBits(IndexReader reader) throws IOException
//	{
//		
//    OpenBitSet bits=new OpenBitSet(reader.maxDoc()); //assume all are INvalid
//		Term startTerm=new Term(fieldName);
//		TermEnum te = reader.terms(startTerm);
//		if(te!=null)
//		{
//			Term currTerm=te.term();
//			while((currTerm!=null)&&(currTerm.field()==startTerm.field())) //term fieldnames are interned
//			{
//				int lastDoc=-1;
//				//set non duplicates
//				TermDocs td = reader.termDocs(currTerm);
//				if(td.next())
//				{
//					if(keepMode==KM_USE_FIRST_OCCURRENCE)
//					{
//						bits.set(td.doc());
//					}
//					else
//					{
//						do
//						{
//							lastDoc=td.doc();
//						}while(td.next());
//						bits.set(lastDoc);
//					}
//				}
//				if(!te.next())
//				{
//					break;
//				}
//				currTerm=te.term();
//			}
//		}
//		return bits;
//	}
//	
//  private OpenBitSet fastBits(IndexReader reader) throws IOException
//	{
//		
//    OpenBitSet bits=new OpenBitSet(reader.maxDoc());
//		bits.set(0,reader.maxDoc()); //assume all are valid
//		Term startTerm=new Term(fieldName);
//		TermEnum te = reader.terms(startTerm);
//		if(te!=null)
//		{
//			Term currTerm=te.term();
//			
//			while((currTerm!=null)&&(currTerm.field()==startTerm.field())) //term fieldnames are interned
//			{
//				if(te.docFreq()>1)
//				{
//					int lastDoc=-1;
//					//unset potential duplicates
//					TermDocs td = reader.termDocs(currTerm);
//					td.next();
//					if(keepMode==KM_USE_FIRST_OCCURRENCE)
//					{
//						td.next();
//					}
//					do
//					{
//						lastDoc=td.doc();
//            bits.clear(lastDoc);
//					}while(td.next());
//					if(keepMode==KM_USE_LAST_OCCURRENCE)
//					{
//						//restore the last bit
//						bits.set(lastDoc);
//					}					
//				}
//				if(!te.next())
//				{
//					break;
//				}
//				currTerm=te.term();
//			}
//		}
//		return bits;
//	}
//
//	/**
//	 * @param args
//	 * @throws IOException 
//	 * @throws Exception 
//	 */
//	public static void main(String[] args) throws Exception
//	{
////		IndexReader r=IndexReader.open("/indexes/personCentricAnon");
//////		IndexReader r=IndexReader.open("/indexes/enron");
////		long start=System.currentTimeMillis();
//////		DuplicateFilter df = new DuplicateFilter("threadId",KM_USE_FIRST_OCCURRENCE, PM_FAST_INVALIDATION);
//////		DuplicateFilter df = new DuplicateFilter("threadId",KM_USE_LAST_OCCURRENCE, PM_FAST_INVALIDATION);
////		DuplicateFilter df = new DuplicateFilter("vehicle.vrm",KM_USE_LAST_OCCURRENCE, PM_FAST_INVALIDATION);
//////		DuplicateFilter df = new DuplicateFilter("title",USE_LAST_OCCURRENCE);
//////		df.setProcessingMode(PM_SLOW_VALIDATION);
////		BitSet b = df.bits(r);
////		long end=System.currentTimeMillis()-start;
////		System.out.println(b.cardinality()+" in "+end+" ms ");
//
//	}
//
//
//	public String getFieldName()
//	{
//		return fieldName;
//	}
//
//
//	public void setFieldName(String fieldName)
//	{
//		this.fieldName = fieldName;
//	}
//
//
//	public int getKeepMode()
//	{
//		return keepMode;
//	}
//
//
//	public void setKeepMode(int keepMode)
//	{
//		this.keepMode = keepMode;
//	}
//
//
//	public boolean equals(Object obj)
//	{
//		if(this == obj)
//			return true;
//		if((obj == null) || (obj.getClass() != this.getClass()))
//			return false;
//		DuplicateFilter other = (DuplicateFilter)obj;
//		return keepMode == other.keepMode &&
//		processingMode == other.processingMode &&
//			(fieldName == other.fieldName || (fieldName != null && fieldName.equals(other.fieldName)));
//	}
//
//
//
//	public int hashCode()
//	{
//		int hash = 217;
//		hash = 31 * hash + keepMode;
//		hash = 31 * hash + processingMode;
//		hash = 31 * hash + fieldName.hashCode();
//		return hash;	
//	}
//
//
//	public int getProcessingMode()
//	{
//		return processingMode;
//	}
//
//
//	public void setProcessingMode(int processingMode)
//	{
//		this.processingMode = processingMode;
//	}
//
//
//	@Override
//	public DocIdSet getDocIdSet(AtomicReaderContext arg0, Bits arg1)
//			throws IOException {
//		// TODO Auto-generated method stub
//		return null;
//	}
//	
//	
//
//}
